fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Redirected link is identical because of 'URL Hack' option: %s%s and %s%s"LF, urladr, urlfil, mov_adr, mov_fil);
test_flush;
}
}
}
//if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) { // ok URL reconnue
// c'est (en gros) la mΩme URL..
// si c'est un problΦme de casse dans le host c'est que le serveur est buggΘ
// ("RFC says.." : host name IS case insensitive)
if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) { // identique α casse prΦs
// on tourne en rond
if (strcmp(mov_fil,urlfil)==0) {
error=1;
get_it=-1; // ne rien faire
if (opt->errlog) {
fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Can not bear crazy server (%s) for %s%s"LF,r->msg,urladr,urlfil);
test_flush;
}
} else { // mauvaise casse, effacer entrΘe dans la pile et rejouer une fois
get_it=1;
}
} else { // adresse diffΘrente
if (ishtml(mov_url)==0) { // pas mΩme adresse MAIS c'est un fichier non html (pas de page moved possible)
// -> on prend α cette adresse, le lien sera enregistrΘ avec lien_record() (hash)
if ((opt->debug>1) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil);
test_flush;
}
// acceptΘ?
if (hts_acceptlink(opt,ptr,lien_tot,liens,
mov_adr,mov_fil,
NULL, NULL,
&set_prio_to,
NULL) != 1) { /* nouvelle adresse non refusΘe ? */
get_it=1;
if ((opt->debug>1) && (opt->log!=NULL)) {
fspc(opt->log,"debug"); fprintf(opt->log,"moved link accepted: %s%s"LF,mov_adr,mov_fil);
// set_prio_to=0+1; // protection if the moved URL is an html page!!
//xxc xxc
{
char BIGSTK mov_sav[HTS_URLMAXSIZE*2];
// calculer lien et Θventuellement modifier addresse/fichier
if (url_savename(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,opt,liens,lien_tot,sback,cache,hash,ptr,numero_passe,NULL)!=-1) {
if (hash_read(hash,mov_sav,"",0,0)<0) { // n'existe pas dΘja
// note mΘtaphysique: il se peut qu'il y ait un index.html et un INDEX.HTML
// sous DOS ca marche pas trΦs bien... mais comme je suis gΘnial url_savename()
// est α mΩme de rΘgler ce problΦme
}
} // ident_url_xx
if (get_it==0) { // adresse vraiment diffΘrente et potentiellement en html (pas de possibilitΘ de bouger la page tel quel α cause des <img src..> et cie)
rn=(char*) calloct(8192,1);
if (rn!=NULL) {
if (opt->errlog) {
fspc(opt->errlog,"warning"); fprintf(opt->errlog,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url);
fprintf(opt->errlog,"\"%s\" (%d) after %d retries at link %s%s (from %s%s)"LF,r->msg,r->statuscode,opt->retry,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
} else {
if (r->statuscode==STATUSCODE_TEST_OK) { // test OK
if ((opt->debug>0) && (opt->errlog!=NULL)) {
fspc(opt->errlog,"info");
fprintf(opt->errlog,"Test OK at link %s%s (from %s%s)"LF,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
}
} else {
if (strcmp(urlfil,"/robots.txt")) { // ne pas afficher d'infos sur robots.txt par dΘfaut
fspc(opt->errlog,"error");
fprintf(opt->errlog,"\"%s\" (%d) at link %s%s (from %s%s)"LF,r->msg,r->statuscode,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
} else {
if (opt->debug>1) {
fspc(opt->errlog,"info"); fprintf(opt->errlog,"No robots.txt rules at %s"LF,urladr);
test_flush;
}
}
}
}
test_flush;
}
// NO error in trop level
// due to the "no connection -> previous restored" hack
// This prevent the engine from wiping all data if the website has been deleted (or moved)
// since last time (which is quite annoying)
if (liens[ptr]->precedent != 0) {
// ici on teste si on doit enregistrer la page tout de mΩme
if (opt->errpage) {
store_errpage=1;
}
} else {
if (strcmp(urlfil,"/robots.txt") != 0) {
/*
This is an error caused by a link entered by the user
That is, link(s) entered by user are invalid (404, 500, connect error, proxy error->.)
If all links entered are invalid, the session failed and we will attempt to restore
the previous one
Example: Try to update a website which has been deleted remotely: this may delete
the website locally, which is really not desired (especially if the website disappeared!)
With this hack, the engine won't wipe local files (how clever)
*/
HTS_STAT.stat_errors_front++;
}
}
} else { // retry!!
if (opt->debug>0 && opt->errlog != NULL) { // on fera un alert si le retry Θchoue
fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Retry after error %d (%s) at link %s%s (from %s%s)"LF,r->statuscode,r->msg,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
fprintf(fp,"Pause"LF"HTTrack is paused after retreiving "LLintP" bytes"LF"Delete this file to continue the mirror->.."LF""LF"",(LLint)HTS_STAT.stat_bytes);
if (back_add(sback,opt,cache,urladr,urlfil,savename,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,liens[ptr]->testmode,&liens[ptr]->pass2)==-1) {